import pickle
import os
# Permanent scope names
scope = dict(
    gcat_small = ['GDEF', 'GCRIM', 'GTOUR'],
    gcat_big = ['GDEF', 'GCRIM', 'GTOUR', 'GENV', 'GENT', 'GVIO', 'GWEA', 'GSPO', 'GHEA', 'GSCI', 'GREL', 'GDIS', 'GODD', 'GFAS'],
    sim = ['GSPO','GTOUR','GWELF'],
    diff = ['GSPO','GSCI','GHEA']
)

# Input File paths
data_path = 'GCAT_modified.hin'

# a simple stop word list
'''
stop_words = ["a", "about", "above", "above", "across", "after", "afterwards", "again", "against", "all", "almost",
              "alone", "along", "already", "also","although","always","am","among", "amongst", "amoungst", "amount",
              "an", "and", "another", "any","anyhow","anyone","anything","anyway", "anywhere", "are", "around", "as",
              "at", "back","be","became", "because","become","becomes", "becoming", "been", "before", "beforehand",
              "behind", "being", "below", "beside", "besides", "between", "beyond", "bill", "both", "bottom","but",
              "by", "call", "can", "cannot", "cant", "co", "con", "could", "couldnt", "cry", "de", "describe", "detail",
              "do", "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven","else", "elsewhere",
              "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few",
              "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly", "forty", "found",
              "four", "from", "front", "full", "further", "get", "give", "go", "had", "has", "hasnt", "have", "he",
              "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself",
              "his", "how", "however", "hundred", "ie", "if", "in", "inc", "indeed", "interest", "into", "is", "it",
              "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd", "made", "many", "may",
              "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", "must",
              "my", "myself", "name", "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody",
              "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one",
              "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own",
              "part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed", "seeming",
              "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere", "six", "sixty", "so",
              "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "system",
              "take", "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there",
              "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "thickv", "thin", "third",
              "this", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too",
              "top", "toward", "towards", "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very",
              "via", "was", "we", "well", "were", "what", "whatever", "when", "whence", "whenever", "where",
              "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while",
              "whither", "who", "whoever", "whole", "whom", "whose","why", "will", "with", "within", "without", "would",
              "yet", "you", "your", "yours", "yourself", "yourselves", "the", "i'm", "you're", "he's", "she's", "it's",
               "we're", "they're", "i've", "you've", "we've", "they've", "i'd", "you'd", "he'd", "she'd", "we'd",
               "they'd", "i'll", "you'll", "he'll", "she'll", "we'll", "they'll", "isn't", "aren't", "wasn't", "weren't",
                "hasn't", "haven't", "hadn't", "doesn't", "don't", "didn't", "won't", "wouldn't", "shan't", "shouldn't",
                 "can't", "cannot", "couldn't", "mustn't", "let's", "that's", "who's", "what's", "here's", "there's",
                  "when's", "where's", "why's", "how's", ".", ",", "-","i","say","says","said","kg","cm"]
'''
stop_words = [',', '.', ';', '!', ':', '?', 'able', 'about', 'above', 'abroad', 'according', 'accord', 'accordingly', 'across', 'actually', 'adj', 'after', 'afterwards', 'again', 'against', 'ago', 'ahead', 'ai', 'be', "n't", 'not', 'all', 'allow', 'allows', 'allow', 'almost', 'alone', 'along', 'alongside', 'already', 'also', 'although', 'always', 'am', 'amid', 'amidst', 'among', 'amongst', 'an', 'a', 'and', 'another', 'any', 'anybody', 'anyhow', 'anyone', 'anything', 'anyway', 'anyways', 'anyway', 'anywhere', 'apart', 'appear', 'appreciate', 'appropriate', 'are', 'be', 'are', 'be', "n't", 'not', 'around', 'as', 'a', "'s", 'aside', 'ask', 'asking', 'ask', 'associated', 'associate', 'at', 'available', 'away', 'awfully', 'back', 'backward', 'backwards', 'be', 'became', 'become', 'because', 'become', 'becomes', 'become', 'becoming', 'become', 'been', 'be', 'before', 'beforehand', 'begin', 'behind', 'being', 'be', 'believe', 'below', 'beside', 'besides', 'best', 'better', 'between', 'beyond', 'both', 'brief', 'but', 'by', 'came', 'come', 'can', 'can', 'not', 'cant', 'ca', 'can', "n't", 'not', 'caption', 'cause', 'causes', 'cause', 'certain', 'certainly', 'changes', 'change', 'clearly', "c'mon", 'co', 'co.', '.', 'com', 'come', 'comes', 'come', 'concerning', 'concern', 'consequently', 'consider', 'considering', 'consider', 'contain', 'containing', 'contain', 'contains', 'contain', 'corresponding', 'correspond', 'could', 'could', "n't", 'not', 'course', 'c', "'s", 'currently', 'dare', 'dare', "n't", 'not', 'definitely', 'described', 'describe', 'despite', 'did', 'do', 'did', 'do', "n't", 'not', 'different', 'directly', 'do', 'does', 'do', 'does', 'do', "n't", 'not', 'doing', 'do', 'done', 'do', 'do', "n't", 'not', 'down', 'downwards', 'downward', 'during', 'each', 'edu', 'eg', 'eight', 'eighty', 'either', 'else', 'elsewhere', 'end', 'ending', 'end', 'enough', 'entirely', 'especially', 'et', 'etc', 'even', 'ever', 'evermore', 'every', 'everybody', 'everyone', 'everything', 'everywhere', 'ex', 'exactly', 'example', 'except', 'fairly', 'far', 'farther', 'few', 'fewer', 'fifth', 'first', 'five', 'followed', 'follow', 'following', 'follow', 'follows', 'follow', 'for', 'forever', 'former', 'formerly', 'forth', 'forward', 'found', 'find', 'four', 'from', 'further', 'furthermore', 'get', 'gets', 'get', 'getting', 'get', 'given', 'give', 'gives', 'give', 'go', 'goes', 'go', 'going', 'go', 'gone', 'go', 'got', 'get', 'gotten', 'get', 'greetings', 'greeting', 'had', 'have', 'had', 'have', "n't", 'not', 'half', 'happens', 'happen', 'hardly', 'has', 'have', 'has', 'have', "n't", 'not', 'have', 'have', "n't", 'not', 'having', 'have', 'he', 'he', "'d", 'would', 'he', '`', 'll', 'hello', 'help', 'hence', 'her', 'she', 'here', 'hereafter', 'hereby', 'herein', 'here', "'s", 'hereupon', 'hers', 'herself', 'he', "'s", 'be', 'hi', 'him', 'he', 'himself', 'his', 'he', 'hither', 'hopefully', 'how', 'howbeit', 'however', 'hundred', 'i', "'d", 'would', 'ie', 'if', 'ignored', 'ignore', 'i', '`', 'll', 'i', "'m", 'be', 'immediate', 'in', 'inasmuch', 'inc', 'inc.', '.', 'indeed', 'indicate', 'indicated', 'indicate', 'indicates', 'indicate', 'inner', 'inside', 'insofar', 'instead', 'into', 'inward', 'is', 'be', 'is', 'be', "n't", 'not', 'it', 'it', "'d", 'would', 'it', '`', 'll', 'its', 'it', "'s", 'be', 'itself', 'i', '`', 've', 'just', 'k', 'keep', 'keeps', 'keep', 'kept', 'keep', 'know', 'known', 'know', 'knows', 'know', 'last', 'lately', 'later', 'latter', 'latterly', 'least', 'less', 'lest', 'let', 'let', "'s", 'like', 'liked', 'like', 'likely', 'likewise', 'little', 'look', 'looking', 'look', 'looks', 'look', 'low', 'lower', 'ltd', 'made', 'make', 'mainly', 'make', 'makes', 'make', 'many', 'may', 'maybe', 'may', "n't", 'not', 'me', 'i', 'mean', 'meantime', 'meanwhile', 'merely', 'might', 'might', "n't", 'not', 'mine', 'minus', 'miss', 'more', 'moreover', 'most', 'mostly', 'mr', 'mrs', 'mr', 'much', 'must', 'must', "n't", 'not', 'my', 'myself', 'name', 'namely', 'nd', 'near', 'nearly', 'necessary', 'need', 'need', "n't", 'not', 'needs', 'need', 'neither', 'never', 'neverf', 'neverless', 'nevertheless', 'new', 'next', 'nine', 'ninety', 'no', 'nobody', 'non', 'none', 'nonetheless', 'noone', 'no-one', 'nor', 'normally', 'not', 'nothing', 'notwithstanding', 'novel', 'now', 'nowhere', 'obviously', 'of', 'off', 'often', 'oh', 'ok', 'okay', 'old', 'on', 'once', 'one', 'ones', 'one', 'one', "'s", 'only', 'onto', 'opposite', 'or', 'other', 'others', 'other', 'otherwise', 'ought', 'ought', "n't", 'not', 'our', 'we', 'ours', 'ourselves', 'out', 'outside', 'over', 'overall', 'own', 'particular', 'particularly', 'past', 'per', 'perhaps', 'placed', 'place', 'please', 'plus', 'possible', 'presumably', 'probably', 'provided', 'provide', 'provides', 'provide', 'que', 'quite', 'qv', 'rather', 'rd', 're', 'really', 'reasonably', 'recent', 'recently', 'regarding', 'regard', 'regardless', 'regards', 'regard', 'relatively', 'respectively', 'right', 'round', 'said', 'say', 'same', 'saw', 'say', 'saying', 'say', 'says', 'say', 'second', 'secondly', 'see', 'seeing', 'see', 'seem', 'seemed', 'seem', 'seeming', 'seem', 'seems', 'seem', 'seen', 'see', 'self', 'selves', 'self', 'sensible', 'sent', 'send', 'serious', 'seriously', 'seven', 'several', 'shall', 'sha', "n't", 'not', 'she', 'she', "'d", 'would', 'she', '`', 'll', 'she', "'s", 'be', 'should', 'should', "n't", 'not', 'since', 'six', 'so', 'some', 'somebody', 'someday', 'somehow', 'someone', 'something', 'sometime', 'sometimes', 'somewhat', 'somewhere', 'soon', 'sorry', 'specified', 'specify', 'specify', 'specifying', 'specify', 'still', 'sub', 'such', 'sup', 'sure', 'take', 'taken', 'take', 'taking', 'take', 'tell', 'tends', 'tend', 'th', 'than', 'thank', 'thanks', 'thanx', 'that', 'that', '`', 'll', 'thats', 'that', 'that', "'s", 'be', 'that', '`', 've', 'the', 'their', 'they', 'theirs', 'them', 'they', 'themselves', 'then', 'thence', 'there', 'thereafter', 'thereby', 'there', "'d", 'would', 'therefore', 'therein', 'there', '`', 'll', 'there', '`', 're', 'be', 'theres', 'there', "'s", 'be', 'thereupon', 'there', '`', 've', 'these', 'they', 'they', "'d", 'would', 'they', '`', 'll', 'they', '`', 're', 'be', 'they', '`', 've', 'thing', 'things', 'thing', 'think', 'third', 'thirty', 'this', 'thorough', 'thoroughly', 'those', 'though', 'three', 'through', 'throughout', 'thru', 'thus', 'till', 'to', 'together', 'too', 'took', 'take', 'toward', 'towards', 'tried', 'try', 'tries', 'try', 'truly', 'try', 'trying', 'try', 't', "'s", 'twice', 'two', 'un', 'under', 'underneath', 'undoing', 'unfortunately', 'unless', 'unlike', 'unlikely', 'until', 'unto', 'up', 'upon', 'upwards', 'upward', 'us', 'we', 'use', 'used', 'use', 'useful', 'uses', 'use', 'using', 'use', 'usually', 'v', 'value', 'various', 'versus', 'very', 'via', 'viz', 'vs', 'want', 'wants', 'want', 'was', 'be', 'was', 'be', "n't", 'not', 'way', 'we', 'we', "'d", 'would', 'welcome', 'well', 'we', '`', 'll', 'went', 'go', 'were', 'be', 'we', '`', 're', 'be', 'were', 'be', "n't", 'not', 'we', '`', 've', 'what', 'whatever', 'what', '`', 'll', 'what', "'s", 'be', 'what', '`', 've', 'when', 'whence', 'whenever', 'where', 'whereafter', 'whereas', 'whereby', 'wherein', 'where', "'s", 'whereupon', 'wherever', 'whether', 'which', 'whichever', 'while', 'whilst', 'whither', 'who', 'who', "'d", 'have', 'whoever', 'whole', 'who', '`', 'll', 'whom', 'whomever', 'who', "'s", 'be', 'whose', 'why', 'will', 'willing', 'wish', 'with', 'within', 'without', 'wonder', 'wo', 'will', "n't", 'not', 'would', 'would', "n't", 'not', 'yes', 'yet', 'you', 'you', "'d", 'would', 'you', '`', 'll', 'your', 'you', 'you', '`', 're', 'be', 'yours', 'yourself', 'yourselves', 'yourselve', 'you', '`', 've', 'zero', 'a', 'about', 'above', 'after', 'again', 'against', 'all', 'am', 'an', 'a', 'and', 'any', 'are', 'be', 'are', 'be', "n't", 'not', 'as', 'at', 'be', 'because', 'been', 'be', 'before', 'being', 'be', 'below', 'between', 'both', 'but', 'by', 'ca', 'can', "n't", 'not', 'can', 'not', 'could', 'could', "n't", 'not', 'did', 'do', 'did', 'do', "n't", 'not', 'do', 'does', 'do', 'does', 'do', "n't", 'not', 'doing', 'do', 'do', "n't", 'not', 'down', 'during', 'each', 'few', 'for', 'from', 'further', 'had', 'have', 'had', 'have', "n't", 'not', 'has', 'have', 'has', 'have', "n't", 'not', 'have', 'have', "n't", 'not', 'having', 'have', 'he', 'he', "'d", 'would', 'he', '`', 'll', 'he', "'s", 'be', 'her', 'she', 'here', 'here', "'s", 'hers', 'herself', 'him', 'he', 'himself', 'his', 'he', 'how', 'how', "'s", 'i', 'i', "'d", 'would', 'i', '`', 'll', 'i', "'m", 'be', 'i', '`', 've', 'if', 'in', 'into', 'is', 'be', 'is', 'be', "n't", 'not', 'it', 'it', "'s", 'be', 'its', 'itself', 'let', "'s", 'me', 'i', 'more', 'most', 'must', "n't", 'not', 'my', 'myself', 'no', 'nor', 'not', 'of', 'off', 'on', 'once', 'only', 'or', 'other', 'ought', 'our', 'we', 'ours', 'ourselves', 'out', 'over', 'own', 'same', 'sha', "n't", 'not', 'she', 'she', "'d", 'would', 'she', '`', 'll', 'she', "'s", 'be', 'should', 'should', "n't", 'not', 'so', 'some', 'such', 'than', 'that', 'that', "'s", 'be', 'the', 'their', 'they', 'theirs', 'them', 'they', 'themselves', 'then', 'there', 'there', "'s", 'be', 'these', 'they', 'they', "'d", 'would', 'they', '`', 'll', 'they', '`', 're', 'be', 'they', '`', 've', 'this', 'those', 'through', 'to', 'too', 'under', 'until', 'up', 'very', 'was', 'be', 'was', 'be', "n't", 'not', 'we', 'we', "'d", 'would', 'we', '`', 'll', 'we', '`', 're', 'be', 'we', '`', 've', 'were', 'be', 'were', 'be', "n't", 'not', 'what', 'what', "'s", 'be', 'when', 'when', "'s", 'where', 'where', "'s", 'which', 'while', 'who', 'who', "'s", 'be', 'whom', 'why', 'why', "'s", 'with', 'wo', 'will', "n't", 'not', 'would', 'would', "n't", 'not', 'you', 'you', "'d", 'would', 'you', '`', 'll', 'you', '`', 're', 'be', 'you', '`', 've', 'your', 'you', 'yours', 'yourself', 'yourselves', 'yourselve', 'a', 'a', "'s", 'able', 'about', 'above', 'according', 'accord', 'accordingly', 'across', 'actually', 'after', 'afterwards', 'again', 'against', 'ai', 'be', "n't", 'not', 'all', 'allow', 'allows', 'allow', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', 'am', 'among', 'amongst', 'an', 'a', 'and', 'another', 'any', 'anybody', 'anyhow', 'anyone', 'anything', 'anyway', 'anyways', 'anyway', 'anywhere', 'apart', 'appear', 'appreciate', 'appropriate', 'are', 'be', 'are', 'be', "n't", 'not', 'around', 'as', 'aside', 'ask', 'asking', 'ask', 'associated', 'associate', 'at', 'available', 'away', 'awfully', 'b', 'be', 'became', 'become', 'because', 'become', 'becomes', 'become', 'becoming', 'become', 'been', 'be', 'before', 'beforehand', 'behind', 'being', 'be', 'believe', 'below', 'beside', 'besides', 'best', 'better', 'between', 'beyond', 'both', 'brief', 'but', 'by', 'c', "c'mon", 'c', "'s", 'came', 'come', 'can', 'ca', 'can', "n't", 'not', 'can', 'not', 'cant', 'cause', 'causes', 'cause', 'certain', 'certainly', 'changes', 'change', 'clearly', 'co', 'com', 'come', 'comes', 'come', 'concerning', 'concern', 'consequently', 'consider', 'considering', 'consider', 'contain', 'containing', 'contain', 'contains', 'contain', 'corresponding', 'correspond', 'could', 'could', "n't", 'not', 'course', 'currently', 'd', 'definitely', 'described', 'describe', 'despite', 'did', 'do', 'did', 'do', "n't", 'not', 'different', 'do', 'does', 'do', 'does', 'do', "n't", 'not', 'doing', 'do', 'do', "n't", 'not', 'done', 'do', 'down', 'downwards', 'downward', 'during', 'e', 'each', 'edu', 'eg', 'eight', 'either', 'else', 'elsewhere', 'enough', 'entirely', 'especially', 'et', 'etc', 'even', 'ever', 'every', 'everybody', 'everyone', 'everything', 'everywhere', 'ex', 'exactly', 'example', 'except', 'f', 'far', 'few', 'fifth', 'first', 'five', 'followed', 'follow', 'following', 'follow', 'follows', 'follow', 'for', 'former', 'formerly', 'forth', 'four', 'from', 'further', 'furthermore', 'g', 'get', 'gets', 'get', 'getting', 'get', 'given', 'give', 'gives', 'give', 'go', 'goes', 'go', 'going', 'go', 'gone', 'go', 'got', 'get', 'gotten', 'get', 'greetings', 'greeting', 'h', 'had', 'have', 'had', 'have', "n't", 'not', 'happens', 'happen', 'hardly', 'has', 'have', 'has', 'have', "n't", 'not', 'have', 'have', "n't", 'not', 'having', 'have', 'he', 'he', "'s", 'be', 'hello', 'help', 'hence', 'her', 'she', 'here', 'here', "'s", 'hereafter', 'hereby', 'herein', 'hereupon', 'hers', 'herself', 'hi', 'him', 'he', 'himself', 'his', 'he', 'hither', 'hopefully', 'how', 'howbeit', 'however', 'i', 'i', "'d", 'would', 'i', '`', 'll', 'i', "'m", 'be', 'i', '`', 've', 'ie', 'if', 'ignored', 'ignore', 'immediate', 'in', 'inasmuch', 'inc', 'indeed', 'indicate', 'indicated', 'indicate', 'indicates', 'indicate', 'inner', 'insofar', 'instead', 'into', 'inward', 'is', 'be', 'is', 'be', "n't", 'not', 'it', 'it', "'d", 'would', 'it', '`', 'll', 'it', "'s", 'be', 'its', 'itself', 'j', 'just', 'k', 'keep', 'keeps', 'keep', 'kept', 'keep', 'know', 'knows', 'know', 'known', 'know', 'l', 'last', 'lately', 'later', 'latter', 'latterly', 'least', 'less', 'lest', 'let', 'let', "'s", 'like', 'liked', 'like', 'likely', 'little', 'look', 'looking', 'look', 'looks', 'look', 'ltd', 'm', 'mainly', 'many', 'may', 'maybe', 'me', 'i', 'mean', 'meanwhile', 'merely', 'might', 'more', 'moreover', 'most', 'mostly', 'much', 'must', 'my', 'myself', 'n', 'name', 'namely', 'nd', 'near', 'nearly', 'necessary', 'need', 'needs', 'need', 'neither', 'never', 'nevertheless', 'new', 'next', 'nine', 'no', 'nobody', 'non', 'none', 'noone', 'nor', 'normally', 'not', 'nothing', 'novel', 'now', 'nowhere', 'o', 'obviously', 'of', 'off', 'often', 'oh', 'ok', 'okay', 'old', 'on', 'once', 'one', 'ones', 'one', 'only', 'onto', 'or', 'other', 'others', 'other', 'otherwise', 'ought', 'our', 'we', 'ours', 'ourselves', 'out', 'outside', 'over', 'overall', 'own', 'p', 'particular', 'particularly', 'per', 'perhaps', 'placed', 'place', 'please', 'plus', 'possible', 'presumably', 'probably', 'provides', 'provide', 'q', 'que', 'quite', 'qv', 'r', 'rather', 'rd', 're', 'really', 'reasonably', 'regarding', 'regard', 'regardless', 'regards', 'regard', 'relatively', 'respectively', 'right', 's', 'said', 'say', 'same', 'saw', 'say', 'saying', 'say', 'says', 'say', 'second', 'secondly', 'see', 'seeing', 'see', 'seem', 'seemed', 'seem', 'seeming', 'seem', 'seems', 'seem', 'seen', 'see', 'self', 'selves', 'self', 'sensible', 'sent', 'send', 'serious', 'seriously', 'seven', 'several', 'shall', 'she', 'should', 'should', "n't", 'not', 'since', 'six', 'so', 'some', 'somebody', 'somehow', 'someone', 'something', 'sometime', 'sometimes', 'somewhat', 'somewhere', 'soon', 'sorry', 'specified', 'specify', 'specify', 'specifying', 'specify', 'still', 'sub', 'such', 'sup', 'sure', 't', 't', "'s", 'take', 'taken', 'take', 'tell', 'tends', 'tend', 'th', 'than', 'thank', 'thanks', 'thanx', 'that', 'that', "'s", 'be', 'thats', 'that', 'the', 'their', 'they', 'theirs', 'them', 'they', 'themselves', 'then', 'thence', 'there', 'there', "'s", 'be', 'thereafter', 'thereby', 'therefore', 'therein', 'theres', 'thereupon', 'these', 'they', 'they', "'d", 'would', 'they', '`', 'll', 'they', '`', 're', 'be', 'they', '`', 've', 'think', 'third', 'this', 'thorough', 'thoroughly', 'those', 'though', 'three', 'through', 'throughout', 'thru', 'thus', 'to', 'together', 'too', 'took', 'take', 'toward', 'towards', 'tried', 'try', 'tries', 'try', 'truly', 'try', 'trying', 'try', 'twice', 'two', 'u', 'un', 'under', 'unfortunately', 'unless', 'unlikely', 'until', 'unto', 'up', 'upon', 'us', 'we', 'use', 'used', 'use', 'useful', 'uses', 'use', 'using', 'use', 'usually', 'uucp', 'v', 'value', 'various', 'very', 'via', 'viz', 'vs', 'w', 'want', 'wants', 'want', 'was', 'be', 'was', 'be', "n't", 'not', 'way', 'we', 'we', "'d", 'would', 'we', '`', 'll', 'we', '`', 're', 'be', 'we', '`', 've', 'welcome', 'well', 'went', 'go', 'were', 'be', 'were', 'be', "n't", 'not', 'what', 'what', "'s", 'be', 'whatever', 'when', 'whence', 'whenever', 'where', 'where', "'s", 'whereafter', 'whereas', 'whereby', 'wherein', 'whereupon', 'wherever', 'whether', 'which', 'while', 'whither', 'who', 'who', "'s", 'be', 'whoever', 'whole', 'whom', 'whose', 'why', 'will', 'willing', 'wish', 'with', 'within', 'without', 'wo', 'will', "n't", 'not', 'wonder', 'would', 'would', 'would', "n't", 'not', 'x', 'y', 'yes', 'yet', 'you', 'you', "'d", 'would', 'you', '`', 'll', 'you', '`', 're', 'be', 'you', '`', 've', 'your', 'you', 'yours', 'yourself', 'yourselves', 'yourselve', 'z', 'zero', 'i', 'a', 'about', 'an', 'a', 'are', 'be', 'as', 'at', 'be', 'by', 'com', 'for', 'from', 'how', 'in', 'is', 'be', 'it', 'of', 'on', 'or', 'that', 'the', 'this', 'to', 'was', 'be', 'what', 'when', 'where', 'who', 'will', 'with', 'the', 'www', 'a', 'about', 'above', 'across', 'after', 'afterwards', 'again', 'against', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', 'am', 'among', 'amongst', 'amoungst', 'amount', 'an', 'a', 'and', 'another', 'any', 'anyhow', 'anyone', 'anything', 'anyway', 'anywhere', 'are', 'be', 'around', 'as', 'at', 'back', 'be', 'became', 'become', 'because', 'become', 'becomes', 'become', 'becoming', 'become', 'been', 'be', 'before', 'beforehand', 'behind', 'being', 'be', 'below', 'beside', 'besides', 'between', 'beyond', 'bill', 'both', 'bottom', 'but', 'by', 'call', 'can', 'can', 'not', 'cant', 'co', 'computer', 'con', 'could', 'couldnt', 'cry', 'de', 'describe', 'detail', 'do', 'done', 'do', 'down', 'due', 'during', 'each', 'eg', 'eight', 'either', 'eleven', 'else', 'elsewhere', 'empty', 'enough', 'etc', 'even', 'ever', 'every', 'everyone', 'everything', 'everywhere', 'except', 'few', 'fifteen', 'fify', 'fill', 'find', 'fire', 'first', 'five', 'for', 'former', 'formerly', 'forty', 'found', 'find', 'four', 'from', 'front', 'full', 'further', 'get', 'give', 'go', 'had', 'have', 'has', 'have', 'hasnt', 'have', 'he', 'hence', 'her', 'she', 'here', 'hereafter', 'hereby', 'herein', 'hereupon', 'hers', 'herse', "''", 'him', 'he', 'himse', "''", 'his', 'he', 'how', 'however', 'hundred', 'i', 'ie', 'if', 'in', 'inc', 'indeed', 'interest', 'into', 'is', 'be', 'it', 'its', 'itse', "''", 'keep', 'last', 'latter', 'latterly', 'least', 'less', 'ltd', 'made', 'make', 'many', 'may', 'me', 'i', 'meanwhile', 'might', 'mill', 'mine', 'more', 'moreover', 'most', 'mostly', 'move', 'much', 'must', 'my', 'myse', "''", 'name', 'namely', 'neither', 'never', 'nevertheless', 'next', 'nine', 'no', 'nobody', 'none', 'noone', 'nor', 'not', 'nothing', 'now', 'nowhere', 'of', 'off', 'often', 'on', 'once', 'one', 'only', 'onto', 'or', 'other', 'others', 'other', 'otherwise', 'our', 'we', 'ours', 'ourselves', 'out', 'over', 'own', 'part', 'per', 'perhaps', 'please', 'put', 'rather', 're', 'same', 'see', 'seem', 'seemed', 'seem', 'seeming', 'seem', 'seems', 'seem', 'serious', 'several', 'she', 'should', 'show', 'side', 'since', 'sincere', 'six', 'sixty', 'so', 'some', 'somehow', 'someone', 'something', 'sometime', 'sometimes', 'somewhere', 'still', 'such', 'system', 'take', 'ten', 'than', 'that', 'the', 'their', 'they', 'them', 'they', 'themselves', 'then', 'thence', 'there', 'thereafter', 'thereby', 'therefore', 'therein', 'thereupon', 'these', 'they', 'thick', 'thin', 'third', 'this', 'those', 'though', 'three', 'through', 'throughout', 'thru', 'thus', 'to', 'together', 'too', 'top', 'toward', 'towards', 'twelve', 'twenty', 'two', 'un', 'under', 'until', 'up', 'upon', 'us', 'we', 'very', 'via', 'was', 'be', 'we', 'well', 'were', 'be', 'what', 'whatever', 'when', 'whence', 'whenever', 'where', 'whereafter', 'whereas', 'whereby', 'wherein', 'whereupon', 'wherever', 'whether', 'which', 'while', 'whither', 'who', 'whoever', 'whole', 'whom', 'whose', 'why', 'will', 'with', 'within', 'without', 'would', 'yet', 'you', 'your', 'you', 'yours', 'yourself', 'yourselves', 'yourselve', 'a', 'able', 'about', 'above', 'abst', 'accordance', 'according', 'accord', 'accordingly', 'across', 'act', 'actually', 'added', 'add', 'adj', 'adopted', 'adopt', 'affected', 'affect', 'affecting', 'affect', 'affects', 'affect', 'after', 'afterwards', 'again', 'against', 'ah', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', 'am', 'among', 'amongst', 'an', 'a', 'and', 'announce', 'another', 'any', 'anybody', 'anyhow', 'anymore', 'anyone', 'anything', 'anyway', 'anyways', 'anyway', 'anywhere', 'apparently', 'approximately', 'are', 'be', 'aren', 'arent', 'arise', 'around', 'as', 'aside', 'ask', 'asking', 'ask', 'at', 'auth', 'available', 'away', 'awfully', 'b', 'back', 'be', 'became', 'become', 'because', 'become', 'becomes', 'become', 'becoming', 'become', 'been', 'be', 'before', 'beforehand', 'begin', 'beginning', 'begin', 'beginnings', 'beginning', 'begins', 'begin', 'behind', 'being', 'be', 'believe', 'below', 'beside', 'besides', 'between', 'beyond', 'biol', 'both', 'brief', 'briefly', 'but', 'by', 'c', 'ca', 'can', 'came', 'come', 'can', 'can', 'not', 'ca', 'can', "n't", 'not', 'cause', 'causes', 'cause', 'certain', 'certainly', 'co', 'com', 'come', 'comes', 'come', 'contain', 'containing', 'contain', 'contains', 'contain', 'could', 'couldnt', 'd', 'date', 'did', 'do', 'did', 'do', "n't", 'not', 'different', 'do', 'does', 'do', 'does', 'do', "n't", 'not', 'doing', 'do', 'done', 'do', 'do', "n't", 'not', 'down', 'downwards', 'downward', 'due', 'during', 'e', 'each', 'ed', 'edu', 'effect', 'eg', 'eight', 'eighty', 'either', 'else', 'elsewhere', 'end', 'ending', 'end', 'enough', 'especially', 'et', 'et-al', 'etc', 'even', 'ever', 'every', 'everybody', 'everyone', 'everything', 'everywhere', 'ex', 'except', 'f', 'far', 'few', 'ff', 'fifth', 'first', 'five', 'fix', 'followed', 'follow', 'following', 'follow', 'follows', 'follow', 'for', 'former', 'formerly', 'forth', 'found', 'find', 'four', 'from', 'further', 'furthermore', 'g', 'gave', 'give', 'get', 'gets', 'get', 'getting', 'get', 'give', 'given', 'give', 'gives', 'give', 'giving', 'give', 'go', 'goes', 'go', 'gone', 'go', 'got', 'get', 'gotten', 'get', 'h', 'had', 'have', 'happens', 'happen', 'hardly', 'has', 'have', 'has', 'have', "n't", 'not', 'have', 'have', "n't", 'not', 'having', 'have', 'he', 'hed', 'hence', 'her', 'she', 'here', 'hereafter', 'hereby', 'herein', 'heres', 'here', 'hereupon', 'hers', 'herself', 'hes', 'he', 'hi', 'hid', 'hide', 'him', 'he', 'himself', 'his', 'he', 'hither', 'home', 'how', 'howbeit', 'however', 'hundred', 'i', 'id', 'ie', 'if', 'i', '`', 'll', 'im', 'immediate', 'immediately', 'importance', 'important', 'in', 'inc', 'indeed', 'index', 'information', 'instead', 'into', 'invention', 'inward', 'is', 'be', 'is', 'be', "n't", 'not', 'it', 'itd', 'it', '`', 'll', 'its', 'itself', 'i', '`', 've', 'j', 'just', 'k', 'keep', 'keeps', 'keep', 'kept', 'keep', 'keys', 'key', 'kg', 'km', 'know', 'known', 'know', 'knows', 'know', 'l', 'largely', 'last', 'lately', 'later', 'latter', 'latterly', 'least', 'less', 'lest', 'let', 'lets', 'let', 'like', 'liked', 'like', 'likely', 'line', 'little', '`', 'll', 'look', 'looking', 'look', 'looks', 'look', 'ltd', 'm', 'made', 'make', 'mainly', 'make', 'makes', 'make', 'many', 'may', 'maybe', 'me', 'i', 'mean', 'means', 'meantime', 'meanwhile', 'merely', 'mg', 'might', 'million', 'miss', 'ml', 'more', 'moreover', 'most', 'mostly', 'mr', 'mrs', 'mr', 'much', 'mug', 'must', 'my', 'myself', 'n', 'na', 'name', 'namely', 'nay', 'nd', 'near', 'nearly', 'necessarily', 'necessary', 'need', 'needs', 'need', 'neither', 'never', 'nevertheless', 'new', 'next', 'nine', 'ninety', 'no', 'nobody', 'non', 'none', 'nonetheless', 'noone', 'nor', 'normally', 'nos', 'no', 'not', 'noted', 'note', 'nothing', 'now', 'nowhere', 'o', 'obtain', 'obtained', 'obtain', 'obviously', 'of', 'off', 'often', 'oh', 'ok', 'okay', 'old', 'omitted', 'omit', 'on', 'once', 'one', 'ones', 'one', 'only', 'onto', 'or', 'ord', 'other', 'others', 'other', 'otherwise', 'ought', 'our', 'we', 'ours', 'ourselves', 'out', 'outside', 'over', 'overall', 'owing', 'owe', 'own', 'p', 'page', 'pages', 'page', 'part', 'particular', 'particularly', 'past', 'per', 'perhaps', 'placed', 'place', 'please', 'plus', 'poorly', 'possible', 'possibly', 'potentially', 'pp', 'predominantly', 'present', 'previously', 'primarily', 'probably', 'promptly', 'proud', 'provides', 'provide', 'put', 'q', 'que', 'quickly', 'quite', 'qv', 'r', 'ran', 'run', 'rather', 'rd', 're', 'readily', 'really', 'recent', 'recently', 'ref', 'refs', 'ref', 'regarding', 'regard', 'regardless', 'regards', 'regard', 'related', 'relatively', 'research', 'respectively', 'resulted', 'result', 'resulting', 'result', 'results', 'result', 'right', 'run', 's', 'said', 'say', 'same', 'saw', 'say', 'saying', 'say', 'says', 'say', 'sec', 'section', 'see', 'seeing', 'see', 'seem', 'seemed', 'seem', 'seeming', 'seem', 'seems', 'seem', 'seen', 'see', 'self', 'selves', 'self', 'sent', 'send', 'seven', 'several', 'shall', 'she', 'shed', 'she', '`', 'll', 'shes', 'she', 'should', 'should', "n't", 'not', 'show', 'showed', 'show', 'shown', 'show', 'showns', 'shown', 'shows', 'show', 'significant', 'significantly', 'similar', 'similarly', 'since', 'six', 'slightly', 'so', 'some', 'somebody', 'somehow', 'someone', 'somethan', 'something', 'sometime', 'sometimes', 'somewhat', 'somewhere', 'soon', 'sorry', 'specifically', 'specified', 'specify', 'specify', 'specifying', 'specify', 'state', 'states', 'state', 'still', 'stop', 'strongly', 'sub', 'substantially', 'successfully', 'such', 'sufficiently', 'suggest', 'sup', 'sure', 't', 'take', 'taken', 'take', 'taking', 'take', 'tell', 'tends', 'tend', 'th', 'than', 'thank', 'thanks', 'thanx', 'that', 'that', '`', 'll', 'thats', 'that', 'that', '`', 've', 'the', 'their', 'they', 'theirs', 'them', 'they', 'themselves', 'then', 'thence', 'there', 'thereafter', 'thereby', 'thered', 'therefore', 'therein', 'there', '`', 'll', 'thereof', 'therere', 'theres', 'thereto', 'thereupon', 'there', '`', 've', 'these', 'they', 'theyd', 'they', '`', 'll', 'theyre', 'they', '`', 've', 'think', 'this', 'those', 'thou', 'though', 'thoughh', 'thousand', 'throug', 'through', 'throughout', 'thru', 'thus', 'til', 'tip', 'to', 'together', 'too', 'took', 'take', 'toward', 'towards', 'tried', 'try', 'tries', 'try', 'truly', 'try', 'trying', 'try', 'ts', 't', 'twice', 'two', 'u', 'un', 'under', 'unfortunately', 'unless', 'unlike', 'unlikely', 'until', 'unto', 'up', 'upon', 'ups', 'up', 'us', 'we', 'use', 'used', 'use', 'useful', 'usefully', 'usefulness', 'uses', 'use', 'using', 'use', 'usually', 'v', 'value', 'various', '`', 've', 'very', 'via', 'viz', 'vol', 'vols', 'vol', 'vs', 'w', 'want', 'wants', 'want', 'was', 'be', 'was', 'be', "n't", 'not', 'way', 'we', 'wed', 'welcome', 'we', '`', 'll', 'went', 'go', 'were', 'be', 'were', 'be', "n't", 'not', 'we', '`', 've', 'what', 'whatever', 'what', '`', 'll', 'whats', 'what', 'when', 'whence', 'whenever', 'where', 'whereafter', 'whereas', 'whereby', 'wherein', 'wheres', 'where', 'whereupon', 'wherever', 'whether', 'which', 'while', 'whim', 'whither', 'who', 'whod', 'whoever', 'whole', 'who', '`', 'll', 'whom', 'whomever', 'whos', 'who', 'whose', 'why', 'widely', 'willing', 'wish', 'with', 'within', 'without', 'wo', 'will', "n't", 'not', 'words', 'word', 'world', 'would', 'would', "n't", 'not', 'www', 'x', 'y', 'yes', 'yet', 'you', 'youd', 'you', '`', 'll', 'your', 'you', 'youre', 'yours', 'yourself', 'yourselves', 'yourselve', 'you', '`', 've', 'z', 'zero', 'me', 'ing', 're', 'how', 'real', 'county', 'student', 'yo', 'ma', 'mb', 'kb', 'gb', 'what', 'when', 'nice', 'win', 'finish', 'won', 'win']


# word count to improve word feature
#f = open('/home/hejiang/code/semihin/word_count_GCAT')
f = open('data/word_count_GCAT')
word_count = pickle.load(f)
f.close()

# Parameters
eps = 0.00001
alpha_candidates = [0.95, 0.9, 0.8]
label_percentage_candidate = [0.01]
tries = dict(
    scope=['sim'],

    alpha=[0.01],
    label_percentage=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10]
)

# Parameters for ensemble of typed linkage graph
tt1 = dict(
    scope = 'diff',
    alpha = 0.01,
    label_percentage = 0.01,
    types = ['computer','astronomy','organization','government','business','automotive','spaceflight','']
)

tt2 = dict(
    scope = 'sim',
    alpha = 0.5,
    label_percentage = 0.01,
    types = ['medicine','computer','sports','religion','automotive','spaceflight',''],
    types4 = ['sports military','sports organization','religion law','spaceflight chemistry']
)
tt3 = dict(
    scope = 'diff',
    alpha = 0.5,
    label_percentage = 0.3,
    types = ['medicine','computer','sports','religion','automotive','spaceflight',''],
    types4 = ['sports military','sports organization','religion law','spaceflight chemistry']
)

typed_tries = tt2
# Repeats
repeats = 10

# co-occurrence-link:
# link multiple entities that is in the same graph.
# we can adjust its weight and control it's level.
co_occurrence_link = True
co_occurrence_weight = 10
# - 's' : sentence level co-occurrence
# - 'd' : document level co-occurrence
co_occurrence_level = 's'

# weight for weighing the power of dwd and ded path
plain_dwd_weight = 0.112


word_freq_min_threshold = 30
word_freq_max_threshold = 10000

# Output parameters
time_verbose = True
summary_verbose = True
summary_path = 'result/summary.txt'
result_verbose = True
result_path = 'galm/' + typed_tries['scope'] + '/labels.txt'

# Experiment types
# - 'plain' : all entity path are treated as equal.
# - 'dwd' : using entity path plus a dwd path.
# - 'typed' : type-constrained graph
graph_generator = 'dwd'
